# -*- coding: utf-8 -*-
import random

import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.batch_dao import BatchDao
from zc_core.dao.sku_dao import SkuDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.item_data_dao import ItemDataDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from suzhou.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    # 常用链接
    item_url = 'http://www.zfcgwssc.suzhou.gov.cn/commodities/{}?p_id={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def _build_list_req(self, spu_id, sku_id, cat3_id, flag):
        return Request(
            url=self.item_url.format(spu_id, sku_id),
            meta={
                'reqType': 'item',
                'batchNo': self.batch_no,
                'skuId': sku_id,
                'spuId': spu_id,
                'catalog3Id': cat3_id,
                'flag': flag
            },
            headers={
                'Host': 'www.zfcgwssc.suzhou.gov.cn',
                'Connection': 'keep-alive',
                'Cache-Control': 'max-age=0',
                'Upgrade-Insecure-Requests': '1',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9',
                # 'Cookie': f'area_id=320505; _suzhou_session={build_cookie()}',
            },
            callback=self.parse_item_data,
            errback=self.error_back,
            dont_filter=False
        )

    def start_requests(self):
        settings = get_project_settings()
        pool_list = SkuDao().get_batch_sku_list(self.batch_no, fields={"_id": 1, "spuId": 1, "catalog3Id": 1})
        self.logger.info('全量: %s' % (len(pool_list)))
        dist_list = [x for x in pool_list if not self.done_filter.contains(x.get('_id'))]
        self.logger.info('目标：%s' % (len(dist_list)))
        random.shuffle(dist_list)
        for sku in dist_list:
            sku_id = sku.get('_id')
            spu_id = sku.get('spuId')
            cat3_id = sku.get('catalog3Id')
            # 避免无效采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: %s', sku_id)
                continue
            # 采集商品关联关系
            yield self._build_list_req(spu_id, sku_id, cat3_id, flag=True)

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        flag = meta.get('flag')

        data = parse_item_data(response)
        if flag:
            sku_fx = parse_sku_fx(response)
            if sku_fx:
                for sku in sku_fx:
                    spu_id = sku.get('spuId')
                    sku_id = sku.get('skuId')
                    item_list = SkuDao().get_batch_sku_list(self.batch_no, query={"_id": sku_id},
                                                            fields={'_id': 1})
                    if item_list.__len__() == 0:
                        cat3_id = sku.get('catalog3Id')
                        sku_list = []
                        sku_list.append(sku)
                        self.logger.info('同款清单: spuId=%s,skuId=%s,cat=%s' % (spu_id, sku_id, cat3_id))
                        yield Box('sku', self.batch_no, sku_list)
                        sku_list.clear()
                        print(self.item_url.format(spu_id, sku_id))
                        yield self._build_list_req(spu_id, sku_id, cat3_id, flag=True)
        if data:
            self.logger.info('商品: [%s]' % data.get('skuId'))
            yield data
        else:
            self.logger.info('下架: %s' % meta.get('url'))
